#this is a function that takes in two arrays (cleaned docs, categories) and a param# for which model to run, and returns the auc, loss, accuracy, fpr, tpr, threshold, predictions, and all docs (needed for later gradient calcs).
def CNN_call(ds,model_num,fold_num):
    import pandas as pd
    import numpy as np
    df=pd.read_csv(ds)
    categories=np.array(df['cat'])
    
    #from sklearn.model_selection import train_test_split
    #train_doc, test_doc, train_val, test_val = train_test_split(np.array(df['NOTE_TEXT']),categories,test_size=.1)
    from sklearn.model_selection import StratifiedKFold
    folder=StratifiedKFold(n_splits=10)
    import numpy as np
    train_sets=[]
    test_sets=[]
    clean_docs=np.array(df['NOTE_TEXT'])
    for train, test in folder.split(clean_docs,categories):
        train_sets.append(train)
        test_sets.append(test)
    test_val=categories[test_sets[fold_num]]
    train_val=categories[train_sets[fold_num]]
    
    test_doc=clean_docs[test_sets[fold_num]]
    train_doc=clean_docs[train_sets[fold_num]]
    
    rules=pd.read_csv('AllRules.csv')
    #rules=rules[rules['False Rate']<0.8]
    print(len(rules))
    rule=np.array(rules['Rule'])
    score=np.array(rules['Score'])
    
    train_weights=-1*np.ones([len(train_doc),(len(rule))])
    test_weights=-1*np.ones([len(test_doc),(len(rule))])
    
    for n in range(0,len(train_doc)):
        text = train_doc[n].lower()
        for i in range(0,len(rule)):
            if rule[i] in text:
                train_weights[n][i]= score[i]
            
    for n in range(0,len(test_doc)):
        text = test_doc[n].lower()
        for i in range(0,len(rule)):
            if rule[i] in text:
                test_weights[n][i]= score[i]
    
    if model_num==9: #this is the new logistic regression
        from sklearn.linear_model import LogisticRegression
        model = LogisticRegression()
        model.fit(train_weights,train_val)
        test_preds=model.predict(test_weights).reshape(1,len(test_doc))[0] #maybe problem with reshape in this line
        acc = model.score(test_weights, test_val)
        import sklearn.metrics as metrics
        fpr, tpr, threshold = metrics.roc_curve(test_val, test_preds)
        auc = metrics.auc(fpr, tpr)
        loss=-100;
        val_loss=-100;
        train_loss=-100;
        tn, fp, fn, tp = metrics.confusion_matrix(test_val,test_preds).ravel()
        return model, loss, acc, fpr, tpr, auc, test_preds, train_doc, test_doc, train_val, test_val,val_loss,train_loss,tn, fp, fn, tp

    if model_num==0: #
        from keras.models import Sequential
        from keras.layers import Dense
        from keras.layers import Dropout
        from keras.callbacks import ModelCheckpoint

        model= Sequential()
        model.add(Dense(100, input_shape=(len(rule),),activation='relu'))
        model.add(Dropout(0.1))
        model.add(Dense(100,activation='relu'))
        model.add(Dropout(0.1))
        model.add(Dense(1,activation='sigmoid'))
        model.compile(loss='binary_crossentropy',optimizer='adam',metrics=['accuracy'])
    
        #checkpoint
        filepath="weights.best.hdf5"
        checkpoint = ModelCheckpoint(filepath, monitor='val_acc', verbose=1, save_best_only=True, mode='max')
        callbacks_list = [checkpoint]
    
        history=model.fit(train_weights,train_val,epochs=25,verbose=2,validation_split=.1, callbacks=callbacks_list )
        model.load_weights("weights.best.hdf5")
        val_loss=history.history['val_loss']
        train_loss=history.history['loss']

        loss, acc = model.evaluate(test_weights, test_val, verbose=2)
        test_preds=model.predict(test_weights).reshape(1,len(test_doc))[0]
        import sklearn.metrics as metrics
        fpr, tpr, threshold = metrics.roc_curve(test_val, test_preds)
        auc = metrics.auc(fpr, tpr)
        
        return model, loss, acc, fpr, tpr, auc, test_preds, train_doc, test_doc, train_val, test_val,val_loss,train_loss
    if model_num==1: # 
        from keras.models import Sequential
        from keras.layers import Dense
        from keras.layers import Dropout
        from keras.callbacks import ModelCheckpoint

        model= Sequential()
        model.add(Dense(100, input_shape=(len(rule),),activation='relu'))
        model.add(Dense(100,activation='relu'))
        model.add(Dropout(0.1))
        model.add(Dense(100,activation='relu'))
        model.add(Dense(1,activation='sigmoid'))
        model.compile(loss='binary_crossentropy',optimizer='adam',metrics=['accuracy'])
    
        #checkpoint
        filepath="weights.best.hdf5"
        checkpoint = ModelCheckpoint(filepath, monitor='val_acc', verbose=1, save_best_only=True, mode='max')
        callbacks_list = [checkpoint]
    
        history=model.fit(train_weights,train_val,epochs=25,verbose=2,validation_split=.1, callbacks=callbacks_list )
        model.load_weights("weights.best.hdf5")
        val_loss=history.history['val_loss']
        train_loss=history.history['loss']

        loss, acc = model.evaluate(test_weights, test_val, verbose=2)
        test_preds=model.predict(test_weights).reshape(1,len(test_doc))[0]
        import sklearn.metrics as metrics
        fpr, tpr, threshold = metrics.roc_curve(test_val, test_preds)
        auc = metrics.auc(fpr, tpr)
        
        return model, loss, acc, fpr, tpr, auc, test_preds, train_doc, test_doc, train_val, test_val,val_loss,train_loss
    if model_num==2: # 
        from keras.models import Sequential
        from keras.layers import Dense
        from keras.layers import Dropout
        from keras.callbacks import ModelCheckpoint

        model= Sequential()
        model.add(Dense(100, input_shape=(len(rule),),activation='relu'))
        model.add(Dense(100,activation='relu'))
        model.add(Dense(100,activation='relu'))
        model.add(Dense(100,activation='relu'))
        model.add(Dense(100,activation='relu'))
        model.add(Dense(100,activation='relu'))
        model.add(Dense(100,activation='relu'))
        model.add(Dense(100,activation='relu'))
        model.add(Dense(100,activation='relu'))
        model.add(Dropout(0.1))
        model.add(Dense(100,activation='relu'))

        model.add(Dense(1,activation='sigmoid'))
        model.compile(loss='binary_crossentropy',optimizer='adam',metrics=['accuracy'])
    
        #checkpoint
        filepath="weights.best.hdf5"
        checkpoint = ModelCheckpoint(filepath, monitor='val_acc', verbose=1, save_best_only=True, mode='max')
        callbacks_list = [checkpoint]
    
        history=model.fit(train_weights,train_val,epochs=25,verbose=2,validation_split=.1, callbacks=callbacks_list )
        model.load_weights("weights.best.hdf5")
        val_loss=history.history['val_loss']
        train_loss=history.history['loss']

        loss, acc = model.evaluate(test_weights, test_val, verbose=2)
        test_preds=model.predict(test_weights).reshape(1,len(test_doc))[0]
        import sklearn.metrics as metrics
        fpr, tpr, threshold = metrics.roc_curve(test_val, test_preds)
        auc = metrics.auc(fpr, tpr)
        
        return model, loss, acc, fpr, tpr, auc, test_preds, train_doc, test_doc, train_val, test_val,val_loss,train_loss
    if model_num==3: # 
        from keras.models import Sequential
        from keras.layers import Dense
        from keras.layers import Dropout
        from keras.callbacks import ModelCheckpoint

        model= Sequential()
        model.add(Dense(50, input_shape=(len(rule),),activation='relu'))
        model.add(Dense(50,activation='relu'))
        model.add(Dense(50,activation='relu'))
        model.add(Dense(50,activation='relu'))
        model.add(Dense(50,activation='relu'))
        model.add(Dense(100,activation='relu'))
        model.add(Dense(100,activation='relu'))
        model.add(Dense(100,activation='relu'))
        model.add(Dense(100,activation='relu'))
        model.add(Dense(100,activation='relu'))
        model.add(Dropout(0.1))
        model.add(Dense(100,activation='relu'))
        model.add(Dense(1,activation='sigmoid'))
        model.compile(loss='binary_crossentropy',optimizer='adam',metrics=['accuracy'])
    
        #checkpoint
        filepath="weights.best.hdf5"
        checkpoint = ModelCheckpoint(filepath, monitor='val_acc', verbose=1, save_best_only=True, mode='max')
        callbacks_list = [checkpoint]
    
        history=model.fit(train_weights,train_val,epochs=25,verbose=2,validation_split=.1, callbacks=callbacks_list )
        model.load_weights("weights.best.hdf5")
        val_loss=history.history['val_loss']
        train_loss=history.history['loss']

        loss, acc = model.evaluate(test_weights, test_val, verbose=2)
        test_preds=model.predict(test_weights).reshape(1,len(test_doc))[0]
        import sklearn.metrics as metrics
        fpr, tpr, threshold = metrics.roc_curve(test_val, test_preds)
        auc = metrics.auc(fpr, tpr)
        
        return model, loss, acc, fpr, tpr, auc, test_preds, train_doc, test_doc, train_val, test_val,val_loss,train_loss
    if model_num==4: # 
        from keras.models import Sequential
        from keras.layers import Dense
        from keras.layers import Dropout
        from keras.callbacks import ModelCheckpoint

        model= Sequential()
        model.add(Dense(100, input_shape=(len(rule),),activation='relu'))
        model.add(Dropout(0.1))
        model.add(Dense(100,activation='relu'))
        model.add(Dropout(0.1))
        model.add(Dense(100,activation='relu'))
        model.add(Dropout(0.1))
        model.add(Dense(1,activation='sigmoid'))
        model.compile(loss='binary_crossentropy',optimizer='adam',metrics=['accuracy'])
    
        #checkpoint
        filepath="weights.best.hdf5"
        checkpoint = ModelCheckpoint(filepath, monitor='val_acc', verbose=1, save_best_only=True, mode='max')
        callbacks_list = [checkpoint]
    
        history=model.fit(train_weights,train_val,epochs=25,verbose=2,validation_split=.1, callbacks=callbacks_list )
        model.load_weights("weights.best.hdf5")
        val_loss=history.history['val_loss']
        train_loss=history.history['loss']

        loss, acc = model.evaluate(test_weights, test_val, verbose=2)
        test_preds=model.predict(test_weights).reshape(1,len(test_doc))[0]
        import sklearn.metrics as metrics
        fpr, tpr, threshold = metrics.roc_curve(test_val, test_preds)
        auc = metrics.auc(fpr, tpr)
        
        return model, loss, acc, fpr, tpr, auc, test_preds, train_doc, test_doc, train_val, test_val,val_loss,train_loss
    if model_num==5: # 
        from keras.models import Sequential
        from keras.layers import Dense
        from keras.layers import Dropout
        from keras.callbacks import ModelCheckpoint

        model= Sequential()
        model.add(Dense(100, input_shape=(len(rule),),activation='relu'))
        model.add(Dropout(0.25))
        model.add(Dense(100,activation='relu'))
        model.add(Dropout(0.25))
        model.add(Dense(1,activation='sigmoid'))
        model.compile(loss='binary_crossentropy',optimizer='adam',metrics=['accuracy'])
    
        #checkpoint
        filepath="weights.best.hdf5"
        checkpoint = ModelCheckpoint(filepath, monitor='val_acc', verbose=1, save_best_only=True, mode='max')
        callbacks_list = [checkpoint]
    
        history=model.fit(train_weights,train_val,epochs=25,verbose=2,validation_split=.1, callbacks=callbacks_list )
        model.load_weights("weights.best.hdf5")
        val_loss=history.history['val_loss']
        train_loss=history.history['loss']

        loss, acc = model.evaluate(test_weights, test_val, verbose=2)
        test_preds=model.predict(test_weights).reshape(1,len(test_doc))[0]
        import sklearn.metrics as metrics
        fpr, tpr, threshold = metrics.roc_curve(test_val, test_preds)
        auc = metrics.auc(fpr, tpr)
        
        return model, loss, acc, fpr, tpr, auc, test_preds, train_doc, test_doc, train_val, test_val,val_loss,train_loss
    if model_num==6: # 
        from keras.models import Sequential
        from keras.layers import Dense
        from keras.layers import Dropout
        from keras.callbacks import ModelCheckpoint

        model= Sequential()
        model.add(Dense(50, input_shape=(len(rule),),activation='relu'))
        model.add(Dropout(0.5))
        model.add(Dense(100,activation='relu'))
        model.add(Dropout(0.5))
        model.add(Dense(100,activation='relu'))

        model.add(Dense(1,activation='sigmoid'))
        model.compile(loss='binary_crossentropy',optimizer='adam',metrics=['accuracy'])
    
        #checkpoint
        filepath="weights.best.hdf5"
        checkpoint = ModelCheckpoint(filepath, monitor='val_acc', verbose=1, save_best_only=True, mode='max')
        callbacks_list = [checkpoint]
    
        history=model.fit(train_weights,train_val,epochs=25,verbose=2,validation_split=.1, callbacks=callbacks_list )
        model.load_weights("weights.best.hdf5")
        val_loss=history.history['val_loss']
        train_loss=history.history['loss']

        loss, acc = model.evaluate(test_weights, test_val, verbose=2)
        test_preds=model.predict(test_weights).reshape(1,len(test_doc))[0]
        import sklearn.metrics as metrics
        fpr, tpr, threshold = metrics.roc_curve(test_val, test_preds)
        auc = metrics.auc(fpr, tpr)
        
        return model, loss, acc, fpr, tpr, auc, test_preds, train_doc, test_doc, train_val, test_val,val_loss,train_loss

    if model_num==7: # 
        from keras.models import Sequential
        from keras.layers import Dense
        from keras.layers import Dropout
        from keras.callbacks import ModelCheckpoint

        model= Sequential()
        model.add(Dense(100, input_shape=(len(rule),),activation='relu'))
        model.add(Dropout(0.25))
        model.add(Dense(100,activation='relu'))
        model.add(Dropout(0.25))
        model.add(Dense(100,activation='relu'))
        model.add(Dropout(0.25))
        model.add(Dense(100,activation='relu'))

        model.add(Dense(1,activation='sigmoid'))
        model.compile(loss='binary_crossentropy',optimizer='adam',metrics=['accuracy'])
    
        #checkpoint
        filepath="weights.best.hdf5"
        checkpoint = ModelCheckpoint(filepath, monitor='val_acc', verbose=1, save_best_only=True, mode='max')
        callbacks_list = [checkpoint]
    
        history=model.fit(train_weights,train_val,epochs=25,verbose=2,validation_split=.1, callbacks=callbacks_list )
        model.load_weights("weights.best.hdf5")
        val_loss=history.history['val_loss']
        train_loss=history.history['loss']

        loss, acc = model.evaluate(test_weights, test_val, verbose=2)
        test_preds=model.predict(test_weights).reshape(1,len(test_doc))[0]
        import sklearn.metrics as metrics
        fpr, tpr, threshold = metrics.roc_curve(test_val, test_preds)
        auc = metrics.auc(fpr, tpr)
        
        return model, loss, acc, fpr, tpr, auc, test_preds, train_doc, test_doc, train_val, test_val,val_loss,train_loss